#Sourcecode for Elife paper Figure 9
#By Prof. Simon Dellicour
#Last updated 28 June 2022

library(diagram)
library(lubridate)
library(seraphim)
library(treeio)

writingFiles = FALSE
showingPlots = FALSE

metadata = read.csv("IQT_TreeTime_Kenya.csv", head=T, sep=";")
sampled_counties = c("Mombasa","Kilifi","Kwale","Taita Taveta","Tana River","Lamu")

# 1. Preparing the input files for the discrete phylogeographic analyses

tree = read.tree(paste0("IQT_TreeTime_Kenya.tre")); tree$tip.label = gsub("'","",tree$tip.label)
seqIDs = tree$tip.label; countries = rep(NA, length(seqIDs)); collectionDates = rep(NA, length(seqIDs))
for (i in 1:length(seqIDs))
	{
		index = which(metadata[,"taxa_id"]==seqIDs[i])
		countries[i] = metadata[index,"country"]
		collectionDates[i] = metadata[index,"datecollection"]
	}
data = cbind(seqIDs,countries,collectionDates); colnames(data) = c("sequence_ID","country","collection_date")
txt = c(); tab = c()
for (i in 1:length(tree$tip.label))
	{
		index = which(data[,1]==tree$tip.label[i])
		date = as.character(data[index,"collection_date"])
		location = data[index,"country"]
		txt = c(txt, paste0(">",tree$tip.label[i]),"NNNN")
		if (location != "Kenya") location = "other"
		tab = rbind(tab, cbind(tree$tip.label[i],location,date))
	}
colnames(tab) = c("trait","location","collection_date")
if (writingFiles) write.table(tab, paste0("IQT_TreeTime_Kenya.txt"), row.names=F, quote=F, sep="\t")
if (writingFiles) write.tree(tree, "IQT_TreeTime_Kenya_Newick_TEMP.tre")
if (writingFiles) write(txt, paste0("IQT_TreeTime_Kenya.fasta"))

# 2. Analysing the outputs of the preliminary discrete phylogeographic analysis 

burnIn = 41; computingHPDInterval = FALSE # N.B.: long analysis
if (computingHPDInterval)
	{
		trees = scan(paste0("IQT_TreeTime_Kenya.trees"), what="", sep="\n", quiet=T, blank.lines.skip=F)
		indices1 = which(!grepl("tree STATE_",trees)); indices2 = which(grepl("tree STATE_",trees))
		mostRecentSamplingDate = max(decimal_date(ymd(data[,"collection_date"])))
		KenyaBranches_list = rep(NA,length(trees)); KenyaIntroductions_list = rep(NA,length(trees))
		KenyaTipBranches_list = rep(NA,length(trees)); Kenya_tMRCAs_list = list()
		for (i in (burnIn+1):length(indices2))
			{
				tree1 = trees[c(indices1[1:(length(indices1)-1)],indices2[i],indices1[length(indices1)])]
				write(tree1, paste0("IQT_TreeTime_Kenya_sampled_tree_",i,".tree"))
				tree2 = readAnnotatedNexus(paste0("IQTreeTime_Kenya_sampled_tree_",i,".tree"))
				KenyaBranches = 0; KenyaIntroductions = 0; KenyaTipBranches = 0; Kenya_tMRCAs = c()
				for (j in 1:dim(tree2$edge)[1])
					{
						if (tree2$annotations[[j]]$location == "Kenya")
							{
								KenyaBranches = KenyaBranches + 1
								index = which(tree2$edge[,2]==tree2$edge[j,1])
								if (tree2$annotations[[index]]$location != "Kenya")
									{
										KenyaIntroductions = KenyaIntroductions + 1
										tMRCA = mostRecentSamplingDate-nodeheight(tree2,tree2$edge[j,1])
										Kenya_tMRCAs = c(Kenya_tMRCAs, tMRCA)
									}
								if (!tree2$edge[j,2]%in%tree2$edge[,1])
									{
										KenyaTipBranches = KenyaTipBranches + 1
									}
							}
					}
				KenyaBranches_list[i] = KenyaBranches
				KenyaIntroductions_list[i] = KenyaIntroductions
				KenyaTipBranches_list[i] = KenyaTipBranches
				Kenya_tMRCAs_list[[i]] = Kenya_tMRCAs
				file.remove(paste0("IQTreeTime_Kenya_sampled_tree_",i,".tree"))
			}
		quantiles = quantile(KenyaIntroductions_list[!is.na(KenyaIntroductions_list)],probs=c(0.025,0.975))
		cat("A minimum number of ",median(KenyaIntroductions_list[!is.na(KenyaIntroductions_list)])," lineage introductions (95% HPD interval = [",
			quantiles[1],"-",quantiles[2],"])"," identified from the global phylogenetic analysis of ",KenyaTipBranches," SARS-CoV-2 sampled in Kenya",sep="")
		# A minimum number of 219 lineage introductions (95% HPD interval = [212-226]) identified from the global phylogenetic analysis of 1428 genomes sampled in Kenya
	}

# 3. Identifying the different clusters (clades following introduction events)

tree = readAnnotatedNexus(paste0("IQT_TreeTime_Kenya.tree"))
kenyanBranches = c(); kenyanIntroductions = c()
kenyanTipBranches = c(); sampledSequences = c()
for (i in 1:dim(tree$edge)[1])
	{
		if (tree$annotations[[i]]$location == "Kenya")
			{
				kenyanBranches = c(kenyanBranches,i)
				index = which(tree$edge[,2]==tree$edge[i,1])
				if (tree$annotations[[index]]$location != "Kenya")
					{
						kenyanIntroductions = c(kenyanIntroductions, i)
					}
				if (!tree$edge[i,2]%in%tree$edge[,1])
					{
						kenyanTipBranches = c(kenyanTipBranches, i)
						sampledSequences = c(sampledSequences, tree$tip.label[tree$edge[i,2]])
					}
			}
	}
for (i in 1:length(kenyanIntroductions))
	{
		if (i == 1) clusters1 = list()
		if (tree$edge[kenyanIntroductions[i],2]%in%tree$edge[,1])
			{
				subtree = tree_subset(tree, tree$edge[kenyanIntroductions[i],2], levels_back=0)
				clusters1[[i]] = gsub("'","",subtree$tip.label)
			}	else		{
				clusters1[[i]] = gsub("'","",tree$tip.label[tree$edge[kenyanIntroductions[i],2]])
			}
	}
for (i in 2:length(clusters1))
	{
		for (j in 1:(i-1))
			{
				if (sum(clusters1[[i]]%in%clusters1[[j]]) == length(clusters1[[i]]))
					{
						clusters1[[j]] = clusters1[[j]][which(!clusters1[[j]]%in%clusters1[[i]])]
					}
				if (sum(clusters1[[j]]%in%clusters1[[i]]) == length(clusters1[[j]]))
					{
						clusters1[[i]] = clusters1[[i]][which(!clusters1[[i]]%in%clusters1[[j]])]
					}
			}
	}
sampledSequences = gsub("'","",sampledSequences)
if (!file.exists(paste0("Sampling_from_Kenya.csv")))
	{
		samplingData = matrix(nrow=length(sampledSequences), ncol=3)
		colnames(samplingData) = c("sequence_ID","collection_date","county")
		samplingData[,"sequence_ID"] = sampledSequences
		for (i in 1:dim(samplingData)[1])
			{
				index = which(metadata[,"taxa_id"]==samplingData[i,"sequence_ID"])
				samplingData[i,"collection_date"] = decimal_date(ymd(metadata[index,"datecollection"]))
				samplingData[i,"county"] = metadata[index,"county"]
			}
		write.csv(samplingData, "Sampling_from_Kenya.csv", quote=F, row.names=F)
	}	
samplingData = read.csv("Sampling_from_Kenya.csv", head=T)
for (i in 1:length(kenyanIntroductions))
	{
		tab = c()
		if (i == 1)
			{
				clusters2 = list(); centroids = list()
			}
		for (j in 1:length(clusters1[[i]]))
			{
				index = which(samplingData[,"sequence_ID"]==clusters1[[i]][j])
				if (length(index) == 1)
					{
						line = cbind(samplingData[index,"collection_date"], samplingData[index,"county"])
						row.names(line) = clusters1[[i]][j]; tab = rbind(tab, line)
					}
			}
		colnames(tab) = c("collection_date","county"); clusters2[[i]] = tab
	}

# 4. Preparing the discrete phylogeographic analysis among counties

template = scan("BSSVS_template_file.xml", what="", sep="\n", quiet=T, blank.lines.skip=F); xml = c()
sink(file=paste0("Phylogeographic_analysis/All_clades_NEW.xml"))
for (i in 1:length(template))
	{
		cat(template[i],"\n")
		if (grepl("Insert taxa blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<taxa id=\"taxa_",j,"\">","\n"))
								for (k in 1:dim(clusters2[[j]])[1])
									{
										if (!is.na(clusters2[[j]][k,"county"]))
											{
												cat(paste0("\t\t<taxon id=\"",row.names(clusters2[[j]])[k],"\">","\n"))
												cat(paste0("\t\t\t<date value=\"",clusters2[[j]][k,"collection_date"],"\" direction=\"forwards\" units=\"years\"/>","\n"))
												cat("\t\t\t<attr name=\"location\">\n")
												cat(paste0("\t\t\t\t",clusters2[[j]][k,"county"],"\n"))
												cat("\t\t\t</attr>\n")
												cat("\t\t</taxon>\n")
											}
									}
								cat("\t</taxa>","\n")
							}
					}
			}
		if (grepl("Insert alignment blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<alignment id=\"alignment_",j,"\" dataType=\"nucleotide\">","\n"))
								for (k in 1:dim(clusters2[[j]])[1])
									{
										if (!is.na(clusters2[[j]][k,"county"]))
											{
												cat("\t\t<sequence>\n")
												cat(paste0("\t\t\t<taxon idref=\"",row.names(clusters2[[j]])[k],"\"/>","\n"))
												cat("\t\t\tNNNN\n")
												cat("\t\t</sequence>\n")
											}
									}
								cat("\t</alignment>","\n")
							}
					}
			}
		if (grepl("Insert pattern blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<patterns id=\"patterns_",j,"\" from=\"1\" strip=\"false\">","\n"))
								cat(paste0("\t\t<alignment idref=\"alignment_",j,"\"/>","\n"))
								cat("\t</patterns>","\n")
							}
					}
			}
		if (grepl("Insert starting tree blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								tre = tree_subset(tree, tree$edge[kenyanIntroductions[j],2], levels_back=0)
								tips = row.names(clusters2[[j]]); tips = tips[which(!is.na(clusters2[[j]][,"county"]))]
								tips_to_drop = tre$tip.label[which(!gsub("'","",tre$tip.label)%in%tips)]
								if (length(tips_to_drop) > 0) tre = ape::drop.tip(tre, tips_to_drop)
								write.tree(tre, paste0("Phylogeographic_analysis/Clade_",i,".tre"))
								tre = scan(paste0("Phylogeographic_analysis/Clade_",i,".tre"), what="", sep="\n", quiet=T)
								txt = c("#NEXUS","begin trees;",paste0("\ttree tree_1 = [&R] ",tre),"end;")
								write(txt, paste0("Phylogeographic_analysis/Clade_",j,".tre"))
								cat(paste0("\t<empiricalTreeDistributionModel id=\"treeModel_",j,"\" fileName=\"Clade_",j,".tre\">","\n"))
								cat(paste0("\t\t<taxa idref=\"taxa_",j,"\"/>","\n"))
								cat("\t</empiricalTreeDistributionModel>","\n")
							}
					}
			}
		if (grepl("Insert location.pattern blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<attributePatterns id=\"location.pattern_",j,"\" attribute=\"location\">","\n"))
								cat(paste0("\t\t<taxa idref=\"taxa_",j,"\"/>","\n"))
								cat(paste0("\t\t<generalDataType idref=\"location.dataType\"/>","\n"))
								cat("\t</attributePatterns>","\n")
							}
					}
			}
		if (grepl("Insert rateStatistic blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<rateStatistic id=\"location.meanRate_",j,"\" name=\"location.meanRate_",j,"\" mode=\"mean\" internal=\"true\" external=\"true\">","\n"))
								cat(paste0("\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
								cat(paste0("\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
								cat("\t</rateStatistic>","\n")
							}
					}
			}
		if (grepl("Insert ancestralTreeLikelihood blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t<ancestralTreeLikelihood id=\"location.treeLikelihood_",j,"\" stateTagName=\"location.states\" useUniformization=\"true\" saveCompleteHistory=\"false\" logCompleteHistory=\"false\">","\n"))
								cat(paste0("\t\t<attributePatterns idref=\"location.pattern_",j,"\"/>","\n"))
								cat(paste0("\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
								cat(paste0("\t\t<siteModel idref=\"location.siteModel\"/>","\n"))
								cat(paste0("\t\t<generalSubstitutionModel idref=\"location.model\"/>","\n"))
								cat(paste0("\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
								cat(paste0("\t\t<frequencyModel id=\"location.root.frequencyModel_",j,"\" normalize=\"true\">","\n"))
								cat(paste0("\t\t\t<generalDataType idref=\"location.dataType\"/>","\n"))
								cat(paste0("\t\t\t<frequencies>","\n"))
								cat(paste0("\t\t\t\t<parameter id=\"location.root.frequencies_",j,"\" dimension=\"6\"/>","\n"))
								cat(paste0("\t\t\t</frequencies>","\n"))
								cat(paste0("\t\t</frequencyModel>","\n"))
								cat(paste0("\t</ancestralTreeLikelihood>","\n"))
							}
					}
			}
		if (grepl("Insert deltaExchange blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t<deltaExchange delta=\"0.75\" weight=\"1\">","\n"))
								cat(paste0("\t\t\t<parameter idref=\"location.root.frequencies_",j,"\"/>","\n"))
								cat("\t\t</deltaExchange>","\n")
							}
					}
			}
		if (grepl("Insert uniformPrior blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t\t\t<ctmcScalePrior>","\n"))
								cat(paste0("\t\t\t\t\t<ctmcScale>","\n"))
								cat(paste0("\t\t\t\t\t\t<parameter idref=\"location.clock.rate\"/>","\n"))
								cat(paste0("\t\t\t\t\t</ctmcScale>","\n"))
								cat(paste0("\t\t\t\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
								cat(paste0("\t\t\t\t</ctmcScalePrior>","\n"))
							}
					}
			}
		if (grepl("Insert uniformPrior blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t\t\t<uniformPrior lower=\"0.0\" upper=\"1.0\">","\n"))
								cat(paste0("\t\t\t\t\t<parameter idref=\"location.root.frequencies_",j,"\"/>","\n"))
								cat(paste0("\t\t\t\t</uniformPrior>","\n"))
							}
					}
			}
		if (grepl("Insert ancestralTreeLikelihood lines 1",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
							}
					}
			}
		if (grepl("Insert rateStatistic lines",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t\t<rateStatistic idref=\"location.meanRate_",j,"\"/>","\n"))
							}
					}
			}
		if (grepl("Insert ancestralTreeLikelihood lines 2",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
							}
					}
			}
		if (grepl("Insert treeFileLog blocks",template[i]))
			{
				for (j in 1:length(clusters2))
					{
						if ((dim(clusters2[[j]])[1] >= 3)&(sum(!is.na(clusters2[[j]][,"county"])) >= 3))
							{
								cat(paste0("\t\t<logTree id=\"treeFileLog_",j,"\" logEvery=\"1000\" nexusFormat=\"true\" fileName=\"Clade_",j,".trees\" sortTranslationTable=\"true\">","\n"))
								cat(paste0("\t\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
								cat(paste0("\t\t\t<trait name=\"rate\" tag=\"location.rate\">","\n"))
								cat(paste0("\t\t\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
								cat(paste0("\t\t\t</trait>","\n"))
								cat(paste0("\t\t\t<joint idref=\"joint\"/>","\n"))
								cat(paste0("\t\t\t<trait name=\"location.states\" tag=\"location\">","\n"))
								cat(paste0("\t\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
								cat(paste0("\t\t\t</trait>","\n"))
								cat(paste0("\t\t</logTree>","\n"))
							}
					}
			}
	}
sink(NULL)

# 5. Subsampling Kenyan counties according to the seroprevalence

allKenyanSequences1 = c(); allKenyanSequences2 = c()
for (i in 1:length(clusters2))
	{
		tmp = clusters2[[i]]
		if (dim(clusters2[[i]])[1] == 1)
			{
				allKenyanSequences1 = c(allKenyanSequences1, rownames(clusters2[[i]]))
			}	else	{
				allKenyanSequences1 = c(allKenyanSequences1, row.names(clusters2[[i]]))
			}		
	}
for (i in 1:length(clusters2))
	{
		allKenyanSequences2 = rbind(allKenyanSequences2, clusters2[[i]])
	}
row.names(allKenyanSequences2) = allKenyanSequences1
allKenyanSequences = allKenyanSequences2[which(!is.na(allKenyanSequences2[,"county"])),]
cases_per_100000 = c(699, 169, 50, 251, 34, 243); nberOfSamples = rep(NA, length(sampled_counties))
for (i in 1:length(sampled_counties))
	{
		nberOfSamples[i] = sum(allKenyanSequences[,"county"]==sampled_counties[i], na.rm=T)
	}
ratios = nberOfSamples/cases_per_100000
subsampling = round(cases_per_100000*min(ratios))
	# subsampling = c(181, 44, 13, 65, 9, 63)

# 6. Preparing the replicated discrete phylogeographic analyses

nberOfReplicates = 10
for (h in 1:nberOfReplicates)
	{
		toSubsample = c()
		for (i in 1:length(counties))
			{
				indices = which(allKenyanSequences[,"county"]==sampled_counties[i])
				toSubsample = c(toSubsample, sample(indices,subsampling[i],replace=F))
			}
		subsample = allKenyanSequences[toSubsample,]
		clusters3 = list(); n = 0
		for (i in 1:length(clusters2))
			{
				clusters3[[i]] = clusters2[[i]][which(row.names(clusters2[[i]])%in%row.names(subsample)),]
			}
		for (i in 1:length(clusters3))
			{
				if (length(clusters3[[i]]) > 0)
					{
						if (is.null(dim(clusters3[[i]])))
							{
								n = n+1
							}	else	{
								n = n+dim(clusters3[[i]])[1]
							}
					}
			}
		template = scan("BSSVS_template_file.xml", what="", sep="\n", quiet=T, blank.lines.skip=F); xml = c()
		sink(file=paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades_NEW.xml"))
		for (i in 1:length(template))
			{
				cat(template[i],"\n")
				if (grepl("Insert taxa blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<taxa id=\"taxa_",j,"\">","\n"))
										for (k in 1:dim(clusters3[[j]])[1])
											{
												if (!is.na(clusters3[[j]][k,"county"]))
													{
														cat(paste0("\t\t<taxon id=\"",row.names(clusters3[[j]])[k],"\">","\n"))
														cat(paste0("\t\t\t<date value=\"",clusters3[[j]][k,"collection_date"],"\" direction=\"forwards\" units=\"years\"/>","\n"))
														cat("\t\t\t<attr name=\"location\">\n")
														cat(paste0("\t\t\t\t",clusters3[[j]][k,"county"],"\n"))
														cat("\t\t\t</attr>\n")
														cat("\t\t</taxon>\n")
													}
											}
										cat("\t</taxa>","\n")
									}
							}
					}
				if (grepl("Insert alignment blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<alignment id=\"alignment_",j,"\" dataType=\"nucleotide\">","\n"))
										for (k in 1:dim(clusters3[[j]])[1])
											{
												if (!is.na(clusters3[[j]][k,"county"]))
													{
														cat("\t\t<sequence>\n")
														cat(paste0("\t\t\t<taxon idref=\"",row.names(clusters3[[j]])[k],"\"/>","\n"))
														cat("\t\t\tNNNN\n")
														cat("\t\t</sequence>\n")
													}
											}
										cat("\t</alignment>","\n")
									}
							}
					}
				if (grepl("Insert pattern blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<patterns id=\"patterns_",j,"\" from=\"1\" strip=\"false\">","\n"))
										cat(paste0("\t\t<alignment idref=\"alignment_",j,"\"/>","\n"))
										cat("\t</patterns>","\n")
									}
							}
					}
				if (grepl("Insert starting tree blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										tre = tree_subset(tree, tree$edge[kenyanIntroductions[j],2], levels_back=0)
										tips = row.names(clusters3[[j]]); tips = tips[which(!is.na(clusters3[[j]][,"county"]))]
										tips_to_drop = tre$tip.label[which(!gsub("'","",tre$tip.label)%in%tips)]
										if (length(tips_to_drop) > 0) tre = ape::drop.tip(tre, tips_to_drop)
										write.tree(tre, paste0("Phylogeographic_replicates/Replicate_",h,"/Clade_",i,".tre"))
										tre = scan(paste0("Phylogeographic_replicates/Replicate_",h,"/Clade_",i,".tre"), what="", sep="\n", quiet=T)
										txt = c("#NEXUS","begin trees;",paste0("\ttree tree_1 = [&R] ",tre),"end;")
										write(txt, paste0("Phylogeographic_replicates/Replicate_",h,"/Clade_",j,".tre"))
										cat(paste0("\t<empiricalTreeDistributionModel id=\"treeModel_",j,"\" fileName=\"Clade_",j,".tre\">","\n"))
										cat(paste0("\t\t<taxa idref=\"taxa_",j,"\"/>","\n"))
										cat("\t</empiricalTreeDistributionModel>","\n")
									}
							}
					}
				if (grepl("Insert location.pattern blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<attributePatterns id=\"location.pattern_",j,"\" attribute=\"location\">","\n"))
										cat(paste0("\t\t<taxa idref=\"taxa_",j,"\"/>","\n"))
										cat(paste0("\t\t<generalDataType idref=\"location.dataType\"/>","\n"))
										cat("\t</attributePatterns>","\n")
									}
							}
					}
				if (grepl("Insert rateStatistic blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<rateStatistic id=\"location.meanRate_",j,"\" name=\"location.meanRate_",j,"\" mode=\"mean\" internal=\"true\" external=\"true\">","\n"))
										cat(paste0("\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
										cat(paste0("\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
										cat("\t</rateStatistic>","\n")
									}
							}
					}
				if (grepl("Insert ancestralTreeLikelihood blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t<ancestralTreeLikelihood id=\"location.treeLikelihood_",j,"\" stateTagName=\"location.states\" useUniformization=\"true\" saveCompleteHistory=\"false\" logCompleteHistory=\"false\">","\n"))
										cat(paste0("\t\t<attributePatterns idref=\"location.pattern_",j,"\"/>","\n"))
										cat(paste0("\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
										cat(paste0("\t\t<siteModel idref=\"location.siteModel\"/>","\n"))
										cat(paste0("\t\t<generalSubstitutionModel idref=\"location.model\"/>","\n"))
										cat(paste0("\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
										cat(paste0("\t\t<frequencyModel id=\"location.root.frequencyModel_",j,"\" normalize=\"true\">","\n"))
										cat(paste0("\t\t\t<generalDataType idref=\"location.dataType\"/>","\n"))
										cat(paste0("\t\t\t<frequencies>","\n"))
										cat(paste0("\t\t\t\t<parameter id=\"location.root.frequencies_",j,"\" dimension=\"6\"/>","\n"))
										cat(paste0("\t\t\t</frequencies>","\n"))
										cat(paste0("\t\t</frequencyModel>","\n"))
										cat(paste0("\t</ancestralTreeLikelihood>","\n"))
									}
							}
					}
				if (grepl("Insert deltaExchange blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t<deltaExchange delta=\"0.75\" weight=\"1\">","\n"))
										cat(paste0("\t\t\t<parameter idref=\"location.root.frequencies_",j,"\"/>","\n"))
										cat("\t\t</deltaExchange>","\n")
									}
							}
					}
				if (grepl("Insert uniformPrior blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t\t\t<ctmcScalePrior>","\n"))
										cat(paste0("\t\t\t\t\t<ctmcScale>","\n"))
										cat(paste0("\t\t\t\t\t\t<parameter idref=\"location.clock.rate\"/>","\n"))
										cat(paste0("\t\t\t\t\t</ctmcScale>","\n"))
										cat(paste0("\t\t\t\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
										cat(paste0("\t\t\t\t</ctmcScalePrior>","\n"))
									}
							}
					}
				if (grepl("Insert uniformPrior blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t\t\t<uniformPrior lower=\"0.0\" upper=\"1.0\">","\n"))
										cat(paste0("\t\t\t\t\t<parameter idref=\"location.root.frequencies_",j,"\"/>","\n"))
										cat(paste0("\t\t\t\t</uniformPrior>","\n"))
									}
							}
					}
				if (grepl("Insert ancestralTreeLikelihood lines 1",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
									}
							}
					}
				if (grepl("Insert rateStatistic lines",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t\t<rateStatistic idref=\"location.meanRate_",j,"\"/>","\n"))
									}
							}
					}
				if (grepl("Insert ancestralTreeLikelihood lines 2",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
									}
							}
					}
				if (grepl("Insert treeFileLog blocks",template[i]))
					{
						for (j in 1:length(clusters3))
							{
								if ((!is.null(dim(clusters3[[j]])))&&(dim(clusters3[[j]])[1] >= 3)&&(sum(!is.na(clusters3[[j]][,"county"])) >= 3))
									{
										cat(paste0("\t\t<logTree id=\"treeFileLog_",j,"\" logEvery=\"10000\" nexusFormat=\"true\" fileName=\"Clade_",j,".trees\" sortTranslationTable=\"true\">","\n"))
										cat(paste0("\t\t\t<treeModel idref=\"treeModel_",j,"\"/>","\n"))
										cat(paste0("\t\t\t<trait name=\"rate\" tag=\"location.rate\">","\n"))
										cat(paste0("\t\t\t\t<strictClockBranchRates idref=\"location.branchRates\"/>","\n"))
										cat(paste0("\t\t\t</trait>","\n"))
										cat(paste0("\t\t\t<joint idref=\"joint\"/>","\n"))
										cat(paste0("\t\t\t<trait name=\"location.states\" tag=\"location\">","\n"))
										cat(paste0("\t\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",j,"\"/>","\n"))
										cat(paste0("\t\t\t</trait>","\n"))
										cat(paste0("\t\t</logTree>","\n"))
									}
							}
					}
			}
		sink(NULL)
	}
for (h in 1:nberOfReplicates)
	{
		dir.create(file.path(paste0("Phylogeographic_tipSwaps/Replicate_",h,"/")), showWarnings=F)
		files = list.files(paste0("Phylogeographic_replicates/Replicate_",h,"/"))
		files = files[which((grepl(".tre",files))&(!grepl(".tree",files)))]
		for (i in 1:length(files))
			{
				file.copy(paste0("Phylogeographic_replicates/Replicate_",h,"/",files[i]), paste0("Phylogeographic_tipSwaps/Replicate_",h,"/",files[i]))
			}
		file.copy(paste0("Phylogeographic_replicates/Replicate_",h,"/beast_1105.jar"), paste0("Phylogeographic_tipSwaps/Replicate_",h,"/beast_1105.jar"))
		template = scan(paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades.xml"), what="", sep="\n", quiet=T, blank.lines.skip=F)
		lines = template[which(grepl("\t<taxa id=\"",template))]; clusters3_IDs = rep(NA, length(lines))
		for (i in 1:length(lines)) clusters3_IDs[i] = as.numeric(gsub("\t<taxa id=\"taxa_","",gsub("\">","",lines[i])))
		sink(file=paste0("Phylogeographic_tipSwaps/Replicate_",h,"/All_clades.xml"))
		for (i in 1:length(template))
			{
				if (grepl("</operators>",template[i]))
					{
						for (j in 1:length(clusters3_IDs))
							{
								cat(paste0("\t\t<tipStateSwapOperator weight=\"2\" uniformRandomization=\"true\">","\n"))
								cat(paste0("\t\t\t<ancestralTreeLikelihood idref=\"location.treeLikelihood_",clusters3_IDs[j],"\"/>","\n"))
								cat(paste0("\t</tipStateSwapOperator>","\n"))
							}
					}
				cat(template[i],"\n")
			}
		sink(NULL)
	}

# 7. Building the maximum clade consensus (MCC) tree of each replicate

nberOfReplicates = 10; wd = getwd()
runningNewAnalyses = TRUE
if (runningNewAnalyses)
	{
		for (i in 1:10)
			{
				setwd(paste0(wd,"/Phylogeographic_replicates/Replicate_",i,"/"))
				treeFiles = list.files(); treeFiles = gsub(".trees","",treeFiles[which(grepl(".trees",treeFiles))])
				for (j in 1:length(treeFiles))
					{
						system(paste0("BEAST_1104/bin/treeannotator -burninTrees 101 -heights keep ",treeFiles[j],".trees ",treeFiles[j],".tree"), ignore.stdout=F, ignore.stderr=F)
					}
			}
	}
setwd(wd)

# 8. Extracting the dispersal history of lineages embedded in trees

nberOfReplicates = 10; wd = getwd()
for (h in 1:nberOfReplicates)
	{
		setwd(paste0(wd,"/Phylogeographic_replicates/Replicate_",h))
		treesFiles = list.files(); treesFiles = treesFiles[which(grepl(".trees",treesFiles))]
		for (i in 1:length(treesFiles))
			{
				dir.create(file.path(gsub(".trees","_ext",treesFiles[i])), showWarnings=F)
				if (!file.exists(paste0(gsub(".trees","_ext",treesFiles[i]),"/TreeExtractions_1.csv")))
					{
						trees = readAnnotatedNexus(treesFiles[i]); trees = trees[102:1001]
						for (j in 1:length(trees))
							{
								tree = trees[[j]]
								tab = matrix(nrow=dim(tree$edge)[1], ncol=4)
								colnames(tab) = c("node1","node2","startLoc","endLoc")
								tab[,"node1"] = tree$edge[,1]; tab[,"node2"] = tree$edge[,2]
								for (k in 1:dim(tree$edge)[1])
									{
										tab[k,"endLoc"] = tree$annotations[[k]]$location
										index = which(tree$edge[,2]==tree$edge[k,1])
										if (length(index) == 1)
											{
												tab[k,"startLoc"] = tree$annotations[[index]]$location
											}	else		{
												if (!tree$edge[k,1]%in%tree$edge[,2])
													{
														tab[k,"startLoc"] = tree$root.annotation$location
													}
											}
									}
								write.csv(tab, paste0(gsub(".trees","_ext",treesFiles[i]),"/TreeExtractions_",j,".csv"), row.names=F, quote=F)
							}
					}
			}
		setwd(wd)
	}
nberOfExtractionFiles = 900
for (h in 1:nberOfReplicates)
	{
		setwd(paste0(wd,"/Phylogeographic_replicates/Replicate_",h))
		dir.create(file.path("All_clades_ext"), showWarnings=F); tab = NULL
		treesFiles = list.files(); treesFiles = treesFiles[which(grepl(".trees",treesFiles))]
		for (i in 1:nberOfExtractionFiles)
			{
				for (j in 1:length(treesFiles))
					{
						if (j == 1)
							{
								tab = read.csv(paste0(gsub(".trees","_ext",treesFiles[j]),"/TreeExtractions_",i,".csv"))
							}	else	{
								tab = rbind(tab, read.csv(paste0(gsub(".trees","_ext",treesFiles[j]),"/TreeExtractions_",i,".csv")))
							}
					}
				write.csv(tab, paste0("All_clades_ext/TreeExtractions_",i,".csv"), row.names=F, quote=F)
			}
		setwd(wd)
	}
if (!file.exists("Phylogeographic_replicates/Matrices.rds"))
	{
		matrices_list = list()
		for (h in 1:nberOfReplicates)
			{
				matrices = list()
				for (i in 1:nberOfExtractionFiles)
					{
						mat = matrix(0, nrow=length(sampled_counties), ncol=length(sampled_counties))
						row.names(mat) = sampled_counties; colnames(mat) = sampled_counties
						tab = read.csv(paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades_ext/TreeExtractions_",i,".csv"), head=T)
						for (j in 1:dim(tab)[1])
							{
								index1 = which(gsub(" ","",sampled_counties)==tab[j,"startLoc"])
								index2 = which(gsub(" ","",sampled_counties)==tab[j,"endLoc"])
								mat[index1,index2] = mat[index1,index2]+1
							}
						matrices[[i]] = mat
					}
				matrices_list[[h]] = matrices
			}
		saveRDS(matrices_list, "Phylogeographic_replicates/Matrices.rds")
	}
for (h in 1:nberOfReplicates)
	{
		log1 = scan(paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades1.log"), what="", sep="\n", quiet=T, blank.lines.skip=F)
		write(log1[which(!grepl("# ",log1))], paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades3.log"))
		log1 = read.table(paste0("Phylogeographic_replicates/Replicate_",h,"/All_clades3.log"), header=T, sep="\t"); log1 = log1[102:1001,]
		log2 = scan(paste0("Phylogeographic_tipSwaps/Replicate_",h,"/All_clades1.log"), what="", sep="\n", quiet=T, blank.lines.skip=F)
		write(log2[which(!grepl("# ",log2))], paste0("Phylogeographic_tipSwaps/Replicate_",h,"/All_clades3.log"))
		log2 = read.table(paste0("Phylogeographic_tipSwaps/Replicate_",h,"/All_clades3.log"), header=T, sep="\t"); log2 = log2[102:1001,]
		BFs1 = matrix(nrow=length(sampled_counties), ncol=length(sampled_counties))
		BFs2 = matrix(nrow=length(sampled_counties), ncol=length(sampled_counties))
		row.names(BFs1) = sampled_counties; colnames(BFs1) = sampled_counties
		row.names(BFs2) = sampled_counties; colnames(BFs2) = sampled_counties
		for (i in 1:length(sampled_counties))
			{
				for (j in 1:length(sampled_counties))
					{
						if (i != j)
							{
								colName = paste0("location.indicators.",gsub(" ",".",sampled_counties[i]),".",gsub(" ",".",sampled_counties[j]))
								index1 = which(colnames(log1)==colName); index2 = which(colnames(log2)==colName)
								p = sum(log1[,index1]==1)/dim(log1)[1]
								K = 30 # length(locations)*(length(locations)-1) # K shoulf be divided by 2 if "symetric" case
								q = (log(2)+K-1)/(K*(K-1))
								BFs1[i,j] = (p/(1-p))/(q/(1-q))
								p1 = sum(log1[,index1]==1)/dim(log1)[1]
								p2 = sum(log2[,index2]==1)/dim(log2)[1]
								BFs2[i,j] = (p1/(1-p1))/(p2/(1-p2))
							}
					}
			}
		write.table(round(BFs1,1), paste0("Phylogeographic_replicates/Replicate_",h,"/BF_values.csv"), sep=",", quote=F)
		write.table(round(BFs2,1), paste0("Phylogeographic_tipSwaps/Replicate_",h,"/BF_values.csv"), sep=",", quote=F)
	}

# 9. Visualising the dispersal history of viral lineages among counties

continents = shapefile("All_continents_shapefile/Continents.shp")
africa = subset(continents, CONTINENT=="Africa"); africa_cropped = crop(africa, extent(36,43,-5,1))
kenya = getData("GADM", country="KEN", level=0); kenya_cropped = crop(kenya, extent(36,43,-5,1))
counties = getData("GADM", country="KEN", level=1)
selected_counties = subset(counties, counties@data[,"NAME_1"]%in%sampled_counties)
centroids = coordinates(selected_counties); matrices_mean = list()
row.names(centroids) = selected_counties@data[,"NAME_1"]; centroids = centroids[sampled_counties,]
matrices_list = readRDS("Phylogeographic_replicates/Matrices.rds")
for (h in 1:nberOfReplicates)
	{
		mat = matrix(0, nrow=length(sampled_counties), ncol=length(sampled_counties))
		for (i in 1:nberOfExtractionFiles) mat = mat+matrices_list[[h]][[i]]
		BFs = read.csv(paste0("Phylogeographic_replicates/Replicate_",h,"/BF_values.csv"))
		BFs = read.csv(paste0("Phylogeographic_tipSwaps/Replicate_",h,"/BF_values.csv"))
		for (i in 1:dim(mat)[1])
			{
				for (j in 1:dim(mat)[2])
					{
						if ((i != j)&(!is.na(BFs[i,j])))
							{
								if (BFs[i,j] < 20) mat[i,j] = NA
							}
					}
			}
		matrices_mean[[h]] = mat/nberOfExtractionFiles
	}
if (showingPlots)
	{
		pdf(paste0("Phylogeographic_replicates/Replicates_NEW.pdf"), width=9, height=6.0) # dev.new(width=11.0, height=3.9)
		par(mfrow=c(3,4), oma=c(0.5,0.5,0.5,0.5), mar=c(0,0,0,0), lwd=0.2, col="gray30")
		plot(africa, col="gray95", border=NA, lwd=0.01)
		plot(kenya, col="gray90", border=NA, lwd=0.01, add=T)
		plot(selected_counties, col="#C6AF42", border=NA, lwd=0.01, add=T)
		plot(kenya, col=NA, border="gray60", lwd=0.50, add=T)	
		plot(selected_counties, col="#C6AF4250", border="gray75", lwd=0.75)
		points(cbind(rep(41,5),rep(-0.7,5)), cex=10*((seq(50,250,50)-minVals1)/(maxVals1-minVals1)), pch=1, col="#4D4D4D", lwd=0.3)
		vS = 5; LWD = (((vS-minVals2)/(maxVals2-minVals2))*3)+0.1; arrow = (0.1*(vS/maxVals2))+0.04
		curvedarrow(cbind(40.5,-3.0), cbind(41.5,-3.0), arr.length=arrow*1.3, arr.width=arrow, lwd=LWD, lty=1, 
					lcol="gray30", arr.col="gray30", arr.pos=0.52, curve=0, dr=NA, endhead=F, arr.type="triangle")
		vS = 10; LWD = (((vS-minVals2)/(maxVals2-minVals2))*3)+0.1; arrow = (0.1*(vS/maxVals2))+0.04
		curvedarrow(cbind(40.5,-3.5), cbind(41.5,-3.5), arr.length=arrow*1.3, arr.width=arrow, lwd=LWD, lty=1, 
					lcol="gray30", arr.col="gray30", arr.pos=0.52, curve=0, dr=NA, endhead=F, arr.type="triangle")
		vS = 20; LWD = (((vS-minVals2)/(maxVals2-minVals2))*3)+0.1; arrow = (0.1*(vS/maxVals2))+0.04
		curvedarrow(cbind(40.5,-4.0), cbind(41.5,-4.0), arr.length=arrow*1.3, arr.width=arrow, lwd=LWD, lty=1, 
					lcol="gray30", arr.col="gray30", arr.pos=0.52, curve=0, dr=NA, endhead=F, arr.type="triangle")
		vS = 30; LWD = (((vS-minVals2)/(maxVals2-minVals2))*3)+0.1; arrow = (0.1*(vS/maxVals2))+0.04
		curvedarrow(cbind(40.5,-4.5), cbind(41.5,-4.5), arr.length=arrow*1.3, arr.width=arrow, lwd=LWD, lty=1, 
					lcol="gray30", arr.col="gray30", arr.pos=0.52, curve=0, dr=NA, endhead=F, arr.type="triangle")
		minVals1 = min(diag(matrices_mean[[1]])); maxVals1 = max(diag(matrices_mean[[1]]))
		mat = matrices_mean[[1]]; diag(mat) = NA; minVals2 = min(mat, na.rm=T); maxVals2 = max(mat, na.rm=T)
		for (h in 2:nberOfReplicates)
			{
				mat1 = matrices_mean[[h]]; mat2 = mat1; diag(mat2) = NA
				if (minVals1 > min(diag(mat1))) minVals1 = min(diag(mat1))
				if (maxVals1 < max(diag(mat1))) maxVals1 = max(diag(mat1))
				if (minVals2 > min(mat2,na.rm=T)) minVals2 = min(mat2,na.rm=T)
				if (maxVals2 < max(mat2,na.rm=T)) maxVals2 = max(mat2,na.rm=T)
			}
		for (h in 1:nberOfReplicates)
			{
				# if (h == 3) plot.new()
				# if (h == 7) plot(selected_counties, col="#C6AF4250", border="gray75", lwd=0.75)
				mat = matrices_mean[[h]]
				plot(selected_counties, col=NA, border=NA, lwd=0.25)
				plot(africa_cropped, col="gray95", border=NA, lwd=0.01, add=T)
				plot(kenya_cropped, col="gray90", border="gray60", lwd=0.50, add=T)
				plot(selected_counties, col="#C6AF4250", border="gray75", lwd=0.75, add=T)
				points(centroids, cex=10*((diag(mat)-minVals1)/(maxVals1-minVals1)), pch=16, col="#4D4D4D50")
				for (i in 1:length(sampled_counties))
					{
						for (j in 1:length(sampled_counties))
							{
								if ((!is.na(mat[i,j]))&&((i!=j)&(mat[i,j]>=1)))
									{
										LWD = (((mat[i,j]-minVals2)/(maxVals2-minVals2))*3)+0.1; arrow = (0.1*(mat[i,j]/maxVals2))+0.04
										curvedarrow(centroids[i,], centroids[j,], arr.length=arrow*1.3, arr.width=arrow, lwd=LWD, lty=1,
													lcol="gray30", arr.col="gray30", arr.pos=0.5, curve=0.15, dr=NA, endhead=F, arr.type="triangle")
									}
							}
					}
			}		
		dev.off()
	}

